Open libraries.
library("mlbench")
library("dplyr")
library("caret")
library("randomForest")
library("lattice")
library("ggplot2")
library("rpart")
library("e1071")
library("caret", lib.loc="/Library/Frameworks/R.framework/Versions/3.4/Resources/library")
library("stats")
library(relaimpo)
library(party)
Get a list of Variable names.
Names <- names(dfNormDatAndFactor)
unlist(Names, recursive = TRUE, use.names = TRUE)
[1] "Enrolling"
[2] "Sex.F"
[3] "Sex.M"
[4] "Expel.N"
[5] "Expel.Y"
[6] "First.Gen.N"
[7] "First.Gen.Y"
[8] "Challenge.Tag.N"
[9] "Challenge.Tag.Y"
[10] "Boettcher.Semi.N"
[11] "Boettcher.Semi.Y"
[12] "Boettcher.Final.N"
[13] "Boettcher.Final.Y"
[14] "Daniels.Semi.N"
[15] "Daniels.Semi.Y"
[16] "Daniels.Final.N"
[17] "Daniels.Final.Y"
[18] "Harvey.App.N"
[19] "Harvey.App.Y"
[20] "Harvey.Final.N"
[21] "Harvey.Final.Y"
[22] "FC.App.N"
[23] "FC.App.Y"
[24] "Thorson.App.N"
[25] "Thorson.App.Y"
[26] "Thorson.Admit.N"
[27] "Thorson.Admit.Y"
[28] "Summet.Participant.N"
[29] "Summet.Participant.Y"
[30] "Mines.Medal.N"
[31] "Mines.Medal.Y"
[32] "SPS.N"
[33] "SPS.Y"
[34] "Veteran.N"
[35] "Veteran.Y"
[36] "Legacy.N"
[37] "Legacy.Y"
[38] "Athlete.N"
[39] "Athlete.Y"
[40] "State.CO"
[41] "State.Other"
[42] "Citizenship.Foreign.National.International"
[43] "Citizenship.International"
[44] "Citizenship.Missing"
[45] "Citizenship.U.S..Citizen"
[46] "Citizenship.U.S..Permanent.Resident.Green.Card.Holder"
[47] "Citizenship.Undocumented.DACA"
[48] "Ethnicity.American.Indian.or.Alaska.Native"
[49] "Ethnicity.Asian"
[50] "Ethnicity.Black.or.African.American"
[51] "Ethnicity.Hispanic.or.Latino"
[52] "Ethnicity.Missing"
[53] "Ethnicity.Multiracial"
[54] "Ethnicity.Native.Hawaiian.or.Other.Pacific.Islander"
[55] "Ethnicity.NotDeclared"
[56] "Ethnicity.Unknown"
[57] "Ethnicity.White"
[58] "Major.App.Applied.Mathematics...Statistics...Computational...Applied.Mathematics"
[59] "Major.App.Applied.Mathematics...Statistics...Statistics"
[60] "Major.App.Chemical.Engineering"
[61] "Major.App.Chemical.Engineering...Biological.Engineering.Specialty"
[62] "Major.App.Chemistry"
[63] "Major.App.Chemistry...Biochemistry.Specialty"
[64] "Major.App.Chemistry...Environmental.Chemistry.Specialty"
[65] "Major.App.Civil.Engineering"
[66] "Major.App.Computer.Science"
[67] "Major.App.Economics"
[68] "Major.App.Electrical.Engineering"
[69] "Major.App.Engineering.Physics"
[70] "Major.App.Environmental.Engineering"
[71] "Major.App.Geological.Engineering"
[72] "Major.App.Geophysical.Engineering"
[73] "Major.App.Mechanical.Engineering"
[74] "Major.App.Metallurgical...Materials.Engineering"
[75] "Major.App.Mining.Engineering"
[76] "Major.App.Missing"
[77] "Major.App.Petroleum.Engineering"
[78] "Major.App.Undecided"
[79] "First.Contact.ACT"
[80] "First.Contact.Application"
[81] "First.Contact.Athlete.Form"
[82] "First.Contact.Campus.Visit"
[83] "First.Contact.College.Fair"
[84] "First.Contact.FUF"
[85] "First.Contact.Girls.Lead.the.Way"
[86] "First.Contact.GPA.Form"
[87] "First.Contact.Inquiry.Form"
[88] "First.Contact.Mailing"
[89] "First.Contact.Materials"
[90] "First.Contact.MEP"
[91] "First.Contact.Phone"
[92] "First.Contact.Preview.Mines"
[93] "First.Contact.Royall.Search"
[94] "First.Contact.SAT"
[95] "First.Contact.TOEFL"
[96] "First.Visit.Campus.Tour"
[97] "First.Visit.Campus.Visit"
[98] "First.Visit.Class.Shadow"
[99] "First.Visit.Discover.Mines"
[100] "First.Visit.Discovery.Mines.in.Your.City"
[101] "First.Visit.Girls.Lead.the.Way"
[102] "First.Visit.Launch"
[103] "First.Visit.Making.The.Connection"
[104] "First.Visit.Meet.Me.at.Mines"
[105] "First.Visit.None"
[106] "First.Visit.Preview.Mines"
[107] "First.Visit.Preview.Mines.In.Your.City"
[108] "Sport.Men.s.Varsity.Baseball"
[109] "Sport.Men.s.Varsity.Basketball"
[110] "Sport.Men.s.Varsity.Football"
[111] "Sport.Men.s.Varsity.Golf"
[112] "Sport.Men.s.Varsity.Soccer"
[113] "Sport.Men.s.Varsity.Wrestling"
[114] "Sport.None"
[115] "Sport.Varsity.Swimming.Diving"
[116] "Sport.Varsity.Track.Field"
[117] "Sport.Varsity.XCountry"
[118] "Sport.Women.s.Varsity.Basketball"
[119] "Sport.Women.s.Varsity.Soccer"
[120] "Sport.Women.s.Varsity.Softball"
[121] "Sport.Women.s.Varsity.Volleyball"
[122] "App.Created.Days"
[123] "Age"
[124] "HS.GPA"
[125] "SATR.Converted"
[126] "Review.OutsideActivity"
[127] "Review.Leadership"
[128] "Review.WorkExp"
[129] "Review.WorkEthic"
[130] "Review.ExpDiversity"
[131] "Review.DesireAttend"
[132] "Review.Affinity"
[133] "Review.InnovEntrep"
[134] "Review.Teamwork"
[135] "Review.OverallFit"
[136] "Logins.60Days"
[137] "EventCount.All"
[138] "EventCount.Admitted"
[139] "EventCount.Campus"
inTraining <- createDataPartition(dfNormDatAndFactor$Enrolling, p = 0.75, list = FALSE)
training <- dfNormDatAndFactor[inTraining, ]
testing <- dfNormDatAndFactor[-inTraining, ]
training
testing
rfModel <- randomForest(Enrolling ~ Sex.F + Sex.M +Expel.N + Expel.Y+ First.Gen.N+ First.Gen.Y+Challenge.Tag.N + Challenge.Tag.Y+ Boettcher.Semi.N+ Boettcher.Semi.Y+ Boettcher.Final.N+Boettcher.Final.Y + Daniels.Semi.N+Daniels.Semi.Y+ Daniels.Final.N+ Daniels.Final.Y+ Harvey.App.N+ Harvey.App.Y + Harvey.Final.N+ Harvey.Final.Y+ FC.App.N+ FC.App.Y+ Thorson.App.N+ Thorson.App.Y+ Thorson.Admit.N +Thorson.Admit.Y+Summet.Participant.N+ Summet.Participant.Y + Mines.Medal.N+ Mines.Medal.Y+ SPS.N+ SPS.Y+ Veteran.N+ Veteran.Y+ Legacy.N + Legacy.Y+ Athlete.N + Athlete.N + Athlete.Y + State.CO + State.Other + Citizenship.Foreign.National.International +Citizenship.International + Citizenship.Missing+ Citizenship.U.S..Citizen+ Citizenship.U.S..Permanent.Resident.Green.Card.Holder+ Citizenship.Undocumented.DACA+ Ethnicity.American.Indian.or.Alaska.Native+ Ethnicity.Asian+ Ethnicity.Black.or.African.American+ Ethnicity.Hispanic.or.Latino + Ethnicity.Missing + Ethnicity.Multiracial +Ethnicity.Native.Hawaiian.or.Other.Pacific.Islander + Ethnicity.NotDeclared + Ethnicity.Unknown +Ethnicity.White+ Major.App.Applied.Mathematics...Statistics...Computational...Applied.Mathematics+ Major.App.Applied.Mathematics...Statistics...Statistics+ Major.App.Chemical.Engineering+ Major.App.Chemical.Engineering...Biological.Engineering.Specialty+ Major.App.Chemistry+ Major.App.Chemistry...Biochemistry.Specialty + Major.App.Chemistry...Environmental.Chemistry.Specialty + Major.App.Civil.Engineering + Major.App.Computer.Science + Major.App.Economics+ Major.App.Electrical.Engineering+ Major.App.Engineering.Physics+ Major.App.Environmental.Engineering+ Major.App.Geological.Engineering + Major.App.Geophysical.Engineering +Major.App.Mechanical.Engineering + Major.App.Metallurgical...Materials.Engineering + Major.App.Mining.Engineering + Major.App.Missing + Major.App.Petroleum.Engineering+ Major.App.Undecided+ First.Contact.ACT + First.Contact.Application + First.Contact.Athlete.Form + First.Contact.Campus.Visit + First.Contact.College.Fair + First.Contact.FUF+First.Contact.Girls.Lead.the.Way+First.Contact.GPA.Form + First.Contact.Inquiry.Form+First.Contact.Mailing+ First.Contact.Materials + First.Contact.MEP+ First.Contact.Phone+ First.Contact.Preview.Mines+ First.Contact.Royall.Search+First.Contact.TOEFL+ First.Contact.SAT+ First.Visit.Campus.Tour+ First.Visit.Campus.Visit+ First.Visit.Class.Shadow + First.Visit.Discover.Mines+ First.Visit.Discovery.Mines.in.Your.City+ First.Visit.Girls.Lead.the.Way+ First.Visit.Launch + First.Visit.Making.The.Connection + First.Visit.Meet.Me.at.Mines+ First.Visit.None+ First.Visit.Preview.Mines + First.Visit.Preview.Mines.In.Your.City + Sport.Men.s.Varsity.Baseball+ Sport.Men.s.Varsity.Basketball+ Sport.Men.s.Varsity.Football + Sport.Men.s.Varsity.Golf +Sport.Men.s.Varsity.Soccer+ Sport.Men.s.Varsity.Wrestling+ Sport.None+ Sport.Varsity.Swimming.Diving+ Sport.Varsity.Track.Field+Sport.Varsity.XCountry+ Sport.Women.s.Varsity.Basketball + Sport.Women.s.Varsity.Soccer+ Sport.Women.s.Varsity.Softball + Sport.Women.s.Varsity.Volleyball+ App.Created.Days + Age + HS.GPA+ SATR.Converted +Review.OutsideActivity +Review.Leadership+Review.WorkExp+ Review.WorkEthic +Review.ExpDiversity +Review.DesireAttend +Review.Affinity+ Review.InnovEntrep+Review.Teamwork +Review.OverallFit + Logins.60Days+EventCount.All+EventCount.Admitted+EventCount.Campus, data = training)
rfModel
Call:
randomForest(formula = Enrolling ~ Sex.F + Sex.M + Expel.N + Expel.Y + First.Gen.N + First.Gen.Y + Challenge.Tag.N + Challenge.Tag.Y + Boettcher.Semi.N + Boettcher.Semi.Y + Boettcher.Final.N + Boettcher.Final.Y + Daniels.Semi.N + Daniels.Semi.Y + Daniels.Final.N + Daniels.Final.Y + Harvey.App.N + Harvey.App.Y + Harvey.Final.N + Harvey.Final.Y + FC.App.N + FC.App.Y + Thorson.App.N + Thorson.App.Y + Thorson.Admit.N + Thorson.Admit.Y + Summet.Participant.N + Summet.Participant.Y + Mines.Medal.N + Mines.Medal.Y + SPS.N + SPS.Y + Veteran.N + Veteran.Y + Legacy.N + Legacy.Y + Athlete.N + Athlete.N + Athlete.Y + State.CO + State.Other + Citizenship.Foreign.National.International + Citizenship.International + Citizenship.Missing + Citizenship.U.S..Citizen + Citizenship.U.S..Permanent.Resident.Green.Card.Holder + Citizenship.Undocumented.DACA + Ethnicity.American.Indian.or.Alaska.Native + Ethnicity.Asian + Ethnicity.Black.or.African.American + Ethnicity.Hispanic.or.Latino + Ethnicity.Missing + Ethnicity.Multiracial + Ethnicity.Native.Hawaiian.or.Other.Pacific.Islander + Ethnicity.NotDeclared + Ethnicity.Unknown + Ethnicity.White + Major.App.Applied.Mathematics...Statistics...Computational...Applied.Mathematics + Major.App.Applied.Mathematics...Statistics...Statistics + Major.App.Chemical.Engineering + Major.App.Chemical.Engineering...Biological.Engineering.Specialty + Major.App.Chemistry + Major.App.Chemistry...Biochemistry.Specialty + Major.App.Chemistry...Environmental.Chemistry.Specialty + Major.App.Civil.Engineering + Major.App.Computer.Science + Major.App.Economics + Major.App.Electrical.Engineering + Major.App.Engineering.Physics + Major.App.Environmental.Engineering + Major.App.Geological.Engineering + Major.App.Geophysical.Engineering + Major.App.Mechanical.Engineering + Major.App.Metallurgical...Materials.Engineering + Major.App.Mining.Engineering + Major.App.Missing + Major.App.Petroleum.Engineering + Major.App.Undecided + First.Contact.ACT + First.Contact.Application + First.Contact.Athlete.Form + First.Contact.Campus.Visit + First.Contact.College.Fair + First.Contact.FUF + First.Contact.Girls.Lead.the.Way + First.Contact.GPA.Form + First.Contact.Inquiry.Form + First.Contact.Mailing + First.Contact.Materials + First.Contact.MEP + First.Contact.Phone + First.Contact.Preview.Mines + First.Contact.Royall.Search + First.Contact.TOEFL + First.Contact.SAT + First.Visit.Campus.Tour + First.Visit.Campus.Visit + First.Visit.Class.Shadow + First.Visit.Discover.Mines + First.Visit.Discovery.Mines.in.Your.City + First.Visit.Girls.Lead.the.Way + First.Visit.Launch + First.Visit.Making.The.Connection + First.Visit.Meet.Me.at.Mines + First.Visit.None + First.Visit.Preview.Mines + First.Visit.Preview.Mines.In.Your.City + Sport.Men.s.Varsity.Baseball + Sport.Men.s.Varsity.Basketball + Sport.Men.s.Varsity.Football + Sport.Men.s.Varsity.Golf + Sport.Men.s.Varsity.Soccer + Sport.Men.s.Varsity.Wrestling + Sport.None + Sport.Varsity.Swimming.Diving + Sport.Varsity.Track.Field + Sport.Varsity.XCountry + Sport.Women.s.Varsity.Basketball + Sport.Women.s.Varsity.Soccer + Sport.Women.s.Varsity.Softball + Sport.Women.s.Varsity.Volleyball + App.Created.Days + Age + HS.GPA + SATR.Converted + Review.OutsideActivity + Review.Leadership + Review.WorkExp + Review.WorkEthic + Review.ExpDiversity + Review.DesireAttend + Review.Affinity + Review.InnovEntrep + Review.Teamwork + Review.OverallFit + Logins.60Days + EventCount.All + EventCount.Admitted + EventCount.Campus, data = training)
Type of random forest: classification
Number of trees: 500
No. of variables tried at each split: 11
OOB estimate of error rate: 8.62%
Confusion matrix:
N Y class.error
N 3563 127 0.03441734
Y 276 711 0.27963526
rfModel.prediction <- predict(rfModel, testing)
table(rfModel.prediction, testing$Enrolling)
rfModel.prediction N Y
N 1181 96
Y 48 233
To determine variable importance.
imprfModel <- importance(rfModel)
imprfModel
MeanDecreaseGini
Sex.F 7.437706e+00
Sex.M 7.970186e+00
Expel.N 8.659331e-01
Expel.Y 9.745556e-01
First.Gen.N 5.083216e+00
First.Gen.Y 5.162621e+00
Challenge.Tag.N 2.142498e+00
Challenge.Tag.Y 2.090986e+00
Boettcher.Semi.N 1.268805e+00
Boettcher.Semi.Y 1.088516e+00
Boettcher.Final.N 4.735714e-01
Boettcher.Final.Y 4.705432e-01
Daniels.Semi.N 1.967173e+00
Daniels.Semi.Y 1.647909e+00
Daniels.Final.N 4.607632e-01
Daniels.Final.Y 3.839780e-01
Harvey.App.N 4.454943e+00
Harvey.App.Y 4.307206e+00
Harvey.Final.N 1.489947e+00
Harvey.Final.Y 1.734864e+00
FC.App.N 2.932341e+00
FC.App.Y 3.056475e+00
Thorson.App.N 6.108025e+00
Thorson.App.Y 5.748607e+00
Thorson.Admit.N 5.450054e+00
Thorson.Admit.Y 5.647499e+00
Summet.Participant.N 8.726508e-01
Summet.Participant.Y 8.859990e-01
Mines.Medal.N 9.469697e-05
Mines.Medal.Y 3.118012e-04
SPS.N 1.104065e+00
SPS.Y 1.168917e+00
Veteran.N 3.948454e+00
Veteran.Y 3.933734e+00
Legacy.N 6.212300e+00
Legacy.Y 6.580279e+00
Athlete.N 6.485726e+00
Athlete.Y 6.714595e+00
State.CO 3.247304e+01
State.Other 2.920199e+01
Citizenship.Foreign.National.International 3.876040e+00
Citizenship.International 3.288128e-01
Citizenship.Missing 2.853720e+00
Citizenship.U.S..Citizen 5.462649e+00
Citizenship.U.S..Permanent.Resident.Green.Card.Holder 1.888712e+00
Citizenship.Undocumented.DACA 1.307468e-01
Ethnicity.American.Indian.or.Alaska.Native 1.831798e+00
Ethnicity.Asian 4.660927e+00
Ethnicity.Black.or.African.American 1.244916e+00
Ethnicity.Hispanic.or.Latino 3.141938e+00
Ethnicity.Missing 4.584504e-01
Ethnicity.Multiracial 0.000000e+00
Ethnicity.Native.Hawaiian.or.Other.Pacific.Islander 1.931345e-01
Ethnicity.NotDeclared 3.649621e+00
Ethnicity.Unknown 0.000000e+00
Ethnicity.White 7.932935e+00
Major.App.Applied.Mathematics...Statistics...Computational...Applied.Mathematics 1.583910e+00
Major.App.Applied.Mathematics...Statistics...Statistics 8.705972e-01
Major.App.Chemical.Engineering 7.347701e+00
Major.App.Chemical.Engineering...Biological.Engineering.Specialty 5.192139e+00
Major.App.Chemistry 1.054614e+00
Major.App.Chemistry...Biochemistry.Specialty 1.475858e+00
Major.App.Chemistry...Environmental.Chemistry.Specialty 2.618606e-01
Major.App.Civil.Engineering 4.720395e+00
Major.App.Computer.Science 4.695704e+00
Major.App.Economics 1.010823e+00
Major.App.Electrical.Engineering 3.748680e+00
Major.App.Engineering.Physics 5.395229e+00
Major.App.Environmental.Engineering 3.140402e+00
Major.App.Geological.Engineering 2.851079e+00
Major.App.Geophysical.Engineering 7.893425e-01
Major.App.Mechanical.Engineering 8.485352e+00
Major.App.Metallurgical...Materials.Engineering 2.134452e+00
Major.App.Mining.Engineering 2.443291e+00
Major.App.Missing 2.788781e+00
Major.App.Petroleum.Engineering 4.397457e+00
Major.App.Undecided 5.302115e+00
First.Contact.ACT 6.692925e+00
First.Contact.Application 8.421762e+00
First.Contact.Athlete.Form 1.114967e+00
First.Contact.Campus.Visit 1.347537e+01
First.Contact.College.Fair 2.625882e-01
First.Contact.FUF 7.978287e+00
First.Contact.Girls.Lead.the.Way 5.371549e-01
First.Contact.GPA.Form 8.735425e-01
First.Contact.Inquiry.Form 2.369550e+00
First.Contact.Mailing 5.415302e-01
First.Contact.Materials 3.451884e-01
First.Contact.MEP 8.260278e-01
First.Contact.Phone 2.024735e-02
First.Contact.Preview.Mines 1.464733e+00
First.Contact.Royall.Search 1.072953e+01
First.Contact.TOEFL 7.034645e-01
First.Contact.SAT 1.420114e+00
First.Visit.Campus.Tour 1.370637e+01
First.Visit.Campus.Visit 1.480266e+01
First.Visit.Class.Shadow 2.329774e+00
First.Visit.Discover.Mines 7.929069e+00
First.Visit.Discovery.Mines.in.Your.City 1.363295e+00
First.Visit.Girls.Lead.the.Way 5.880593e-01
First.Visit.Launch 4.658352e+01
First.Visit.Making.The.Connection 4.584449e+00
First.Visit.Meet.Me.at.Mines 1.263360e+00
First.Visit.None 6.927932e+01
First.Visit.Preview.Mines 6.506139e+00
First.Visit.Preview.Mines.In.Your.City 1.782319e+01
Sport.Men.s.Varsity.Baseball 1.934009e-01
Sport.Men.s.Varsity.Basketball 4.905744e-01
Sport.Men.s.Varsity.Football 2.877780e+00
Sport.Men.s.Varsity.Golf 2.163090e-01
Sport.Men.s.Varsity.Soccer 4.367005e-01
Sport.Men.s.Varsity.Wrestling 6.442545e-01
Sport.None 6.618847e+00
Sport.Varsity.Swimming.Diving 1.314591e-02
Sport.Varsity.Track.Field 2.996113e-01
Sport.Varsity.XCountry 9.709506e-02
Sport.Women.s.Varsity.Basketball 4.858711e-01
Sport.Women.s.Varsity.Soccer 3.653904e-01
Sport.Women.s.Varsity.Softball 1.199113e-02
Sport.Women.s.Varsity.Volleyball 5.702375e-01
App.Created.Days 5.761980e+01
Age 4.282867e+01
HS.GPA 6.344325e+01
SATR.Converted 4.569617e+01
Review.OutsideActivity 1.585565e+01
Review.Leadership 1.912368e+01
Review.WorkExp 1.929378e+01
Review.WorkEthic 1.537185e+01
Review.ExpDiversity 1.606620e+01
Review.DesireAttend 2.316809e+01
Review.Affinity 1.992175e+01
Review.InnovEntrep 1.495064e+01
Review.Teamwork 1.910074e+01
Review.OverallFit 2.497267e+01
Logins.60Days 1.379456e+00
EventCount.All 1.642425e+02
EventCount.Admitted 2.249928e+02
EventCount.Campus 1.435702e+02
#format((sort(imprfModel, decreasing=TRUE)), scientific=F)
#sort(imprfModel, decreasing=TRUE) # relative importance
dfimprfModel <-as.data.frame(imprfModel)
dfimprfModel
confusionMatrix(table(rfModel.prediction, testing$Enrolling))
Confusion Matrix and Statistics
rfModel.prediction N Y
N 1181 96
Y 48 233
Accuracy : 0.9076
95% CI : (0.8921, 0.9215)
No Information Rate : 0.7888
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.7069
Mcnemar's Test P-Value : 8.978e-05
Sensitivity : 0.9609
Specificity : 0.7082
Pos Pred Value : 0.9248
Neg Pred Value : 0.8292
Prevalence : 0.7888
Detection Rate : 0.7580
Detection Prevalence : 0.8196
Balanced Accuracy : 0.8346
'Positive' Class : N
Plot of importance of Variabels.
varImpPlot(rfModel,cex = .6, pt.cex = .7, color = "navy blue", gcolor = par("fg"), lcolor = "gray", main = "Variable Importance for Random Forest Model")
rfModelLess <- randomForest(Enrolling ~ State.CO + State.Other + Major.App.Mechanical.Engineering + First.Contact.Application + First.Contact.Campus.Visit + First.Contact.FUF + First.Contact.Royall.Search+ First.Visit.Campus.Tour+ First.Visit.Campus.Visit+ First.Visit.Discover.Mines+ First.Visit.Launch + First.Visit.None+ First.Visit.Preview.Mines.In.Your.City + App.Created.Days + Age + HS.GPA+ SATR.Converted +Review.OutsideActivity +Review.Leadership+Review.WorkExp+ Review.WorkEthic +Review.ExpDiversity +Review.DesireAttend +Review.Affinity+ Review.InnovEntrep+Review.Teamwork +Review.OverallFit +Ethnicity.White +EventCount.All+EventCount.Admitted+EventCount.Campus, data = training)
rfModelLess
Call:
randomForest(formula = Enrolling ~ State.CO + State.Other + Major.App.Mechanical.Engineering + First.Contact.Application + First.Contact.Campus.Visit + First.Contact.FUF + First.Contact.Royall.Search + First.Visit.Campus.Tour + First.Visit.Campus.Visit + First.Visit.Discover.Mines + First.Visit.Launch + First.Visit.None + First.Visit.Preview.Mines.In.Your.City + App.Created.Days + Age + HS.GPA + SATR.Converted + Review.OutsideActivity + Review.Leadership + Review.WorkExp + Review.WorkEthic + Review.ExpDiversity + Review.DesireAttend + Review.Affinity + Review.InnovEntrep + Review.Teamwork + Review.OverallFit + Ethnicity.White + EventCount.All + EventCount.Admitted + EventCount.Campus, data = training)
Type of random forest: classification
Number of trees: 500
No. of variables tried at each split: 5
OOB estimate of error rate: 8.51%
Confusion matrix:
N Y class.error
N 3563 127 0.03441734
Y 271 716 0.27456940
rfModelLess.prediction <- predict(rfModelLess, testing)
table(rfModelLess.prediction, testing$Enrolling)
rfModelLess.prediction N Y
N 1183 98
Y 46 231
To determine variable imortance.
imprfModelLess <- importance(rfModelLess)
#format(imprfModelLess, scientific=F)
imprfModelLess
MeanDecreaseGini
State.CO 30.878949
State.Other 29.257083
Major.App.Mechanical.Engineering 12.579230
First.Contact.Application 11.785755
First.Contact.Campus.Visit 15.618481
First.Contact.FUF 9.576268
First.Contact.Royall.Search 15.563429
First.Visit.Campus.Tour 15.820761
First.Visit.Campus.Visit 19.779964
First.Visit.Discover.Mines 9.325906
First.Visit.Launch 55.597020
First.Visit.None 82.567953
First.Visit.Preview.Mines.In.Your.City 16.833940
App.Created.Days 85.972986
Age 65.084346
HS.GPA 98.868442
SATR.Converted 72.984411
Review.OutsideActivity 21.638767
Review.Leadership 29.026726
Review.WorkExp 28.769419
Review.WorkEthic 22.232976
Review.ExpDiversity 22.870812
Review.DesireAttend 32.226286
Review.Affinity 29.324082
Review.InnovEntrep 23.136576
Review.Teamwork 28.812416
Review.OverallFit 37.286821
Ethnicity.White 14.279990
EventCount.All 193.936731
EventCount.Admitted 238.199556
EventCount.Campus 157.806502
confusionMatrix(table(rfModelLess.prediction, testing$Enrolling))
Confusion Matrix and Statistics
rfModelLess.prediction N Y
N 1183 98
Y 46 231
Accuracy : 0.9076
95% CI : (0.8921, 0.9215)
No Information Rate : 0.7888
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.7055
Mcnemar's Test P-Value : 2.138e-05
Sensitivity : 0.9626
Specificity : 0.7021
Pos Pred Value : 0.9235
Neg Pred Value : 0.8339
Prevalence : 0.7888
Detection Rate : 0.7593
Detection Prevalence : 0.8222
Balanced Accuracy : 0.8323
'Positive' Class : N
rfModelTop25 <- randomForest(Enrolling ~ State.CO + State.Other + First.Contact.Campus.Visit + First.Visit.Campus.Tour+ First.Visit.Campus.Visit+ First.Visit.Discover.Mines+ First.Visit.Launch + First.Visit.None+ First.Visit.Preview.Mines.In.Your.City + App.Created.Days + Age + HS.GPA+ SATR.Converted +Review.OutsideActivity +Review.Leadership+Review.WorkExp+ Review.WorkEthic +Review.ExpDiversity +Review.DesireAttend +Review.Affinity+ Review.InnovEntrep+Review.Teamwork +Review.OverallFit +EventCount.All+EventCount.Admitted+EventCount.Campus, data = training)
rfModelTop25
Call:
randomForest(formula = Enrolling ~ State.CO + State.Other + First.Contact.Campus.Visit + First.Visit.Campus.Tour + First.Visit.Campus.Visit + First.Visit.Discover.Mines + First.Visit.Launch + First.Visit.None + First.Visit.Preview.Mines.In.Your.City + App.Created.Days + Age + HS.GPA + SATR.Converted + Review.OutsideActivity + Review.Leadership + Review.WorkExp + Review.WorkEthic + Review.ExpDiversity + Review.DesireAttend + Review.Affinity + Review.InnovEntrep + Review.Teamwork + Review.OverallFit + EventCount.All + EventCount.Admitted + EventCount.Campus, data = training)
Type of random forest: classification
Number of trees: 500
No. of variables tried at each split: 5
OOB estimate of error rate: 8.66%
Confusion matrix:
N Y class.error
N 3565 125 0.03387534
Y 280 707 0.28368794
rfModelTop25.prediction <- predict(rfModelTop25, testing)
table(rfModelTop25.prediction, testing$Enrolling)
rfModelTop25.prediction N Y
N 1182 98
Y 47 231
To determine variable imortance.
imprfModelTop25 <- importance(rfModelTop25)
#format(imprfModelLess, scientific=F)
imprfModelTop25
MeanDecreaseGini
State.CO 32.81859
State.Other 29.26938
First.Contact.Campus.Visit 16.71002
First.Visit.Campus.Tour 16.26760
First.Visit.Campus.Visit 21.30551
First.Visit.Discover.Mines 10.33880
First.Visit.Launch 53.72968
First.Visit.None 76.59342
First.Visit.Preview.Mines.In.Your.City 17.63941
App.Created.Days 97.41080
Age 70.79303
HS.GPA 108.79595
SATR.Converted 79.43763
Review.OutsideActivity 22.15891
Review.Leadership 30.39901
Review.WorkExp 31.24720
Review.WorkEthic 23.82738
Review.ExpDiversity 23.54627
Review.DesireAttend 32.95253
Review.Affinity 30.80140
Review.InnovEntrep 24.81996
Review.Teamwork 30.19490
Review.OverallFit 39.41873
EventCount.All 198.86630
EventCount.Admitted 252.81695
EventCount.Campus 157.31704
confusionMatrix(table(rfModelTop25.prediction, testing$Enrolling))
Confusion Matrix and Statistics
rfModelTop25.prediction N Y
N 1182 98
Y 47 231
Accuracy : 0.9069
95% CI : (0.8914, 0.9209)
No Information Rate : 0.7888
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.7038
Mcnemar's Test P-Value : 3.292e-05
Sensitivity : 0.9618
Specificity : 0.7021
Pos Pred Value : 0.9234
Neg Pred Value : 0.8309
Prevalence : 0.7888
Detection Rate : 0.7587
Detection Prevalence : 0.8216
Balanced Accuracy : 0.8319
'Positive' Class : N
PredictionTop25 <- cbind(rfModelTop25.prediction,testing)
PredictionTop25